James McCreight
This example show retrieving a single (hourly) timeseries from the NWM v2.1 Retrospective zarr store and comparing it with USGS (daily) gage data at the same location.
The data access for a single gage is a bit slow if that is a repeated pattern (points desired are not in few spatial chunks). If you are doing an a at-a-gage analysis, then you might want to look at usage_example_rerechunk_chrtout.ipynb to get your work done on time.
# Using virtual environment in requirements_vis.txt
from climata.usgs import DailyValueIO
import holoviews as hv
import hvplot
import numpy as np
import pandas as pd
import xarray as xr
hv.extension('bokeh')
hv.opts.defaults(
hv.opts.Scatter(width=1200, height=500) )
pd.options.plotting.backend = 'holoviews'
cfs_2_cms = 0.028316846592
usgs_station_id = "13317000" ## Lower Salmon River is an unmanaged flow but you can pick your own.
chrtout_file = '/glade/p/datashare/ishitas/nwm_retro_v2.1/chrtout.zarr'
ds_nwm_chrtout = xr.open_zarr(chrtout_file)
ds_nwm_gage = (
ds_nwm_chrtout
.where(ds_nwm_chrtout.gage_id == f'{usgs_station_id.rjust(15, " ")}'.encode(), drop=True))
# We could delay loading until plotting, but statistics would load the full timeseries....
# This is faster without using a dask cluster on casper
%time streamflow_nwm = ds_nwm_gage.streamflow.load()
CPU times: user 2min 27s, sys: 3min 11s, total: 5min 39s Wall time: 3min 15s
streamflow_nwm_df = streamflow_nwm.squeeze('feature_id').to_dataframe()
# Possible but a bit intensive at hourly resolution
# streamflow_nwm_df.plot.scatter(x='time', y='streamflow')
param_id = "00060" # streamflow in ft3/s
data = DailyValueIO(
start_date=pd.Timestamp(ds_nwm_chrtout.time[0].values).date(),
end_date=pd.Timestamp(ds_nwm_chrtout.time[-1].values).date(),
station=usgs_station_id,
parameter=param_id,)
# create lists of date-flow values
streamflow_usgs_d = {}
for series in data:
streamflow_usgs_d['streamflow_obs'] = [r[1] * cfs_2_cms for r in series.data]
streamflow_usgs_d['time'] = [pd.to_datetime(r[0]) for r in series.data]
streamflow_usgs_df = pd.DataFrame(streamflow_usgs_d).set_index('time')
streamflow_usgs_df.plot.scatter(x='time', y='streamflow_obs') #, height=500, width=1500)
combo_df = (
streamflow_nwm_df
.join(streamflow_usgs_df, how='outer')
.rename(columns={'streamflow': 'NWM v2.1', 'streamflow_obs': 'observed'}))
def plot_water_year(water_year: int):
wy_df = (
combo_df[(combo_df.index >= f'{water_year - 1}-10-01') &
(combo_df.index < f'{water_year}-10-01')])
title = (
f'Water year {water_year}, USGS station {usgs_station_id} : '
f'National Water Model v2.1 retrospective and USGS observed streamflows')
display(
wy_df.plot.scatter(
x='time', y=['NWM v2.1', 'observed'],
title=title)
.opts(
ylabel='cubic meters per second',
xlabel=''))
return None
plot_water_year(2018)
plot_water_year(2003)
plot_water_year(1996)